library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(countrycode)
df <- read_csv("survey_results_public.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_character(),
## Respondent = col_double(),
## Age = col_double(),
## CompTotal = col_double(),
## ConvertedComp = col_double(),
## WorkWeekHrs = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
DFF <- select(df, MainBranch, Country,Gender,Age, YearsCode, YearsCodePro ,EdLevel,Employment, JobSat, OrgSize, WorkWeekHrs, NEWOvertime, NEWOnboardGood, JobSeek, NEWLearn, LanguageWorkedWith,PlatformWorkedWith)
DF_USA <- DFF[DFF$Country == "United States" & !is.na(DFF$Country) & DFF$MainBranch == "I am a developer by profession" & !is.na(DFF$MainBranch), ][3:17]
DF_LC <- cbind(region = as.character(countrycode(sourcevar = DFF$Country,
origin = "country.name",
destination = "region")), DFF)
## Warning in countrycode(sourcevar = DFF$Country, origin = "country.name", : Some values were not matched unambiguously: Nomadic
DF_LC <- DF_LC[DF_LC$region == "Latin America & Caribbean" & !is.na(DF_LC$region) & DFF$MainBranch == "I am a developer by profession" & !is.na(DFF$MainBranch), ][, 4:18]
DF_MIX <- cbind(region = as.character(countrycode(sourcevar = DFF$Country,
origin = "country.name",
destination = "region")), DFF)
## Warning in countrycode(sourcevar = DFF$Country, origin = "country.name", : Some values were not matched unambiguously: Nomadic
DF_MIX <- DF_MIX[((DF_MIX$region == "Latin America & Caribbean" & !is.na(DF_MIX$region)) | (DF_MIX$Country == "United States" & !is.na(DF_MIX$Country))) & DF_MIX$MainBranch == "I am a developer by profession" & !is.na(DF_MIX$MainBranch), ][, 3:18]
DF_MIX$Country[DF_MIX$Country != "United States"] <- "América Latina"
DF_MIX$Country[DF_MIX$Country == "United States"] <- "Estados Unidos"
colnames(DF_MIX)[which(names(DF_MIX) == "Country")] <- "Region"
DF_MIX
| Pregunta | Variable | Tipo de Variable |
|---|---|---|
| Which of the following describe you, if any? Please check all that apply. If you prefer not to answer, you may leave this question blank. | Genero | Nominal |
| What is your age (in years)? If you prefer not to answer, you may leave this question blank. | Edad | Discreta |
| Including any education, how many years have you been coding in total? | Años codeando | Discreta |
temp = DF_USA$Gender[!is.na(DF_USA$Gender)]
gender_vec_USA <- vector()
gender_vec_LC <- vector()
for (pal in temp) {
for (sub_pal in strsplit(pal, ";")) {
gender_vec_USA <- c(gender_vec_USA, sub_pal)
}
}
temp = DF_LC$Gender[!is.na(DF_LC$Gender)]
gender_vec <- vector()
for (pal in temp) {
for (sub_pal in strsplit(pal, ";")) {
gender_vec_LC <- c(gender_vec_LC, sub_pal)
}
}
layout(matrix(c(1,2,3,3), ncol=2, byrow=TRUE), heights=c(6, 1))
par(mai=rep(0.5, 4))
temp = table(gender_vec_USA)
man <- round(temp[1]/length(gender_vec_USA)*100,2)
other <- round(temp[2]/length(gender_vec_USA)*100,2)
woman <- round(temp[3]/length(gender_vec_USA)*100,2)
pie(table(gender_vec_USA), main = "Estados Unidos", labels = c(paste(man, "%", sep = ""), paste(other, "%", sep = ""), paste(woman, "%", sep = "")), col = c("skyblue","orange","red"))
temp = table(gender_vec_LC)
man <- round(temp[1]/length(gender_vec_LC)*100,2)
other <- round(temp[2]/length(gender_vec_LC)*100,2)
woman <- round(temp[3]/length(gender_vec_LC)*100,2)
pie(table(gender_vec_LC), main = "América Latina", labels = c(paste(man, "%", sep = ""), paste(other, "%", sep = ""), paste(woman, "%", sep = "")), col = c("skyblue","orange","red"))
par(mai=c(0,0,0,0))
plot.new()
legend(x="center", ncol=3,legend=c("Hombre","Otros","Mujer"),
fill=c("skyblue","orange","red"))
boxplot(DF_MIX$Age~DF_MIX$Region, na.rm= T, xlab = "Region", ylab = "Edad")
abline(h=mean(DF_USA$Age, na.rm = TRUE), col = "red", lwd=2)
abline(h=mean(DF_LC$Age, na.rm = TRUE), col = "blue",lwd=2)
legend(1.2,100,legend=c("media EEUU", "Media SyC"),col=c("red", "blue"),lwd=2)
Podemos observar que las edades de los programadores profesionales están más concentradas en América Latina comparando los rangos intercuartílicos:
IQR(DF_LC$Age, na.rm = TRUE)
## [1] 9
IQR(DF_USA$Age, na.rm = TRUE)
## [1] 12
Además, la edad promedio de los programadores profesionales de América Latina es de 30.14 mientras que el de Estados Unidos es de 34.33,
round(mean(DF_LC$Age, na.rm = TRUE), 2)
## [1] 30.14
round(mean(DF_USA$Age, na.rm = TRUE),2)
## [1] 34.33
Junto con las medianas podemos concluir que en general, los programadores profesiones de Estados Unidos son mayores que que los de América Latina.
round(median(DF_LC$Age, na.rm = TRUE), 2)
## [1] 28.5
round(median(DF_USA$Age, na.rm = TRUE),2)
## [1] 32
boxplot(as.numeric(DF_MIX$YearsCode)~DF_MIX$Region, na.rm= TRUE, xlab = "Region", ylab = "Años Codeando")
abline(h=mean(as.numeric(DF_USA$YearsCode), na.rm = TRUE), col = "red")
abline(h=mean(as.numeric(DF_LC$YearsCode), na.rm = TRUE), col = "blue")
legend(1.1,50,legend=c("media EEUU", "Media SyC"),col=c("red", "blue"),lwd=2)
En el diagrama de cajas y bigotes podemos ver como los programadores profesionales de Estados Unidos llevan mas años programando en total en comparación con los de América Latina.
También, es importante destacar la gran diferencia de 6 años que existe entre el 3er cuartíl de ambas regiones. Esto implica que el 75% de los programadores profesionales de Estados Unidos cuentan a lo mucho con 22 años programando mientras que en América latina solo 16.
round(quantile(as.numeric(DF_LC$YearsCode), na.rm = TRUE), 2)
## 0% 25% 50% 75% 100%
## 1 7 10 16 50
round(quantile(as.numeric(DF_USA$YearsCode), na.rm = TRUE),2)
## 0% 25% 50% 75% 100%
## 1 8 14 22 50